library(haven)
library(MuMIn)
library(arrow)
##
## Attaching package: 'arrow'
## The following object is masked from 'package:utils':
##
## timestamp
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::duration() masks arrow::duration()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(nlme)
##
## Attaching package: 'nlme'
##
## The following object is masked from 'package:lme4':
##
## lmList
##
## The following object is masked from 'package:dplyr':
##
## collapse
library(ggeffects)
library(rlang)
##
## Attaching package: 'rlang'
##
## The following objects are masked from 'package:purrr':
##
## %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
## flatten_raw, invoke, splice
##
## The following object is masked from 'package:arrow':
##
## string
library(tableone)
library(r2mlm)
##
## Please cite as:
##
## Shaw, M., Rights, J.D., Sterba, S.S., Flake, J.K. (2023). r2mlm: An R package calculating R-squared measures for multilevel models. Behavior Research Methods, 55(4), 1942-1964. doi:10.3758/s13428-022-01841-4
library(ggpubr)
# Read environment variables from a .env file
readRenviron(".env")
dat_dir <- Sys.getenv("DAT_DIR") # Data directory
man_img_dir <- Sys.getenv("MAN_IMG_DIR") # Manuscript image directory
dem <- readRDS(file.path(dat_dir, "CLEAR3_Biaffect_Leow", "clear3trait.rds")) %>%
as_factor() %>% # Convert haven_labelled variables to factors
mutate(
id = as.character(id),
race_clear123 = as_factor(race_clear123),
ethnicity = as_factor(ethnicity)
) %>%
arrange(id) %>%
mutate(across(where(is.character), ~ if_else(.x == "", "Unknown", .x))) %>%
rename(Age = age, Gender = gender, `House income` = houseincomecat)
# Given to me by Anisha
dem[dem$id == "3085",]$Age <- 24
dem[dem$id == "3100",]$Age <- 23
colnames(dem)
## [1] "id" "Age"
## [3] "race_clear123" "ethnicity"
## [5] "bmi" "filing_status"
## [7] "sex_orient" "Gender"
## [9] "House income" "currentdx_bipolar1"
## [11] "currentdx_bipolar2" "currentdx_cyclothymic"
## [13] "currentdx_othbipolar" "currentdx_bipAMC"
## [15] "currentdx_sindbip" "currentdx_MDD"
## [17] "currentdx_PDD" "currentdx_othDD"
## [19] "currentdx_DDAMC" "currentdx_SiDD"
## [21] "currentdx_Schizophrenia" "currentdx_schizophreniform"
## [23] "currentdx_schizoaffective" "currentdx_delusionaldo"
## [25] "currentdx_briefpsychoticdo" "currentdx_psychoticAMC"
## [27] "currentdx_sipsychoticdo" "currentdx_otherpsychotic"
## [29] "currentdx_SUDalc" "currentdx_SUDsha"
## [31] "currentdx_SUDmj" "currentdx_SUDstim"
## [33] "currentdx_SUDopioid" "currentdx_SUDpcp"
## [35] "currentdx_SUDothhall" "currentdx_SUDinhal"
## [37] "currentdx_SUDother" "currentdx_PanicDO"
## [39] "currentdx_Agoraphobia" "currentdx_SAD"
## [41] "currentdx_Phobi" "currentdx_GAD"
## [43] "currentdx_OtherAnx" "currentdx_AnxAMC"
## [45] "currentdx_siANX" "currentdx_OCD"
## [47] "currentdx_otherOC" "currentdx_OCDAMC"
## [49] "currentdx_siOCD" "currentdx_AN"
## [51] "currentdx_BN" "currentdx_BED"
## [53] "currentdx_otheat" "currentdx_ADHD"
## [55] "currentdx_AcuteStressDO" "currentdx_PTSD"
## [57] "currentdx_AdjustmentDO" "currentdx_Othertrauma"
## [59] "currentdx_PMDD_prov" "currentdx_BPD"
dat_oura <- readRDS(
file.path(dat_dir, "CLEAR3_Biaffect_Leow", "clear3baseline_sleep.rds")) %>%
mutate(
Oura_Sleep_hour = Oura_Sleep_TST / 3600, # Second to hour
sleepdur_yest = sleepdur_yest + 1 # Likert to hours
) %>%
filter(id != 3086 | daterated != ymd("2022-07-31")) # Duplicate entries
colnames(dat_oura)
## [1] "id" "daterated" "cleartrialphase" "sleepdur_yest"
## [5] "SleepLNQuality" "Oura_Sleep_TST" "sedative_YN" "stim_YN"
## [9] "SSRI" "SNRI" "bzd_YN" "Oura_Sleep_hour"
Convert to parquet to make accessible to Python.
dat_oura_raw <- readRDS(
file.path(dat_dir, "CLEAR3_Biaffect_Leow", "clear3baseline_sleep.rds"))
write_parquet(dat_oura_raw, file.path(dat_dir,
"CLEAR3_Biaffect_Leow",
"clear3baseline_sleep.parquet"))
ggplot(dat_oura, aes(sleepdur_yest, Oura_Sleep_hour)) +
geom_jitter() +
geom_abline(intercept = 0, slope = 1) +
labs(title = "Recorded vs reported sleep time across participants (jittered)") +
xlab("Self-reported sleep duration (hour)") +
ylab("Oura-recorded total sleep time (hour)") +
theme(text = element_text(size = 18))
## Warning: Removed 8760 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(dat_oura, aes(SleepLNQuality, Oura_Sleep_hour)) +
geom_point() +
labs(title = "Recorded sleep time vs reported sleep quality across participants") +
xlab("Self-reported sleep quality (higher is better)") +
ylab("Oura-recorded total sleep time (hour)") +
theme(text = element_text(size = 18))
## Warning: Removed 8763 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(dat_oura, aes(sleepdur_yest, SleepLNQuality)) +
geom_jitter() +
labs(title = "Reported sleep time vs quality across participants") +
xlab("Self-reported sleep duration (hour)") +
ylab("Self-reported sleep quality (higher is better)") +
theme(text = element_text(size = 18))
## Warning: Removed 2682 rows containing missing values or values outside the scale range
## (`geom_point()`).
Participants with self-report data:
tmp <- dat_oura %>%
drop_na(sleepdur_yest)
n_distinct(tmp$id)
## [1] 150
Participants with Oura data:
tmp <- dat_oura %>%
drop_na(Oura_Sleep_TST)
n_distinct(tmp$id)
## [1] 60
cor.test(dat_oura$sleepdur_yest, dat_oura$Oura_Sleep_hour, method = "spearman")
## Warning in cor.test.default(dat_oura$sleepdur_yest, dat_oura$Oura_Sleep_hour, :
## Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: dat_oura$sleepdur_yest and dat_oura$Oura_Sleep_hour
## S = 289552223, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.739788
# The values in sleepdur_yest are not the raw number of hours slept, but
# rather an index to the response options:
# 1 - 2 or fewer hours
# 2 - 3 hours
# 3 - 4 hours
# ...
# 9 - 10 or more hours
dat <- read_parquet(file.path(dat_dir, "spleep_pred_10_90_alpha1_2024-11-14.parquet")) %>%
mutate(
Oura_Sleep_hour = Oura_Sleep_TST / 3600, # Second to hour
sleepdur_yest = sleepdur_yest + 1
) %>%
filter(subject != "3086" | date != ymd("2022-07-31")) # Duplicate entries
dat_dem <- dat %>%
left_join(dem, by = c("subject" = "id"))
dat_cmplt_sr <- dat_dem %>%
drop_na(hour_estimate, sleepdur_yest)
dat_cmplt_oura <- dat_dem %>%
drop_na(c(hour_estimate, Oura_Sleep_hour))
colnames(dat)
## [1] "subject" "date" "cleartrialphase" "sleepdur_yest"
## [5] "SleepLNQuality" "Oura_Sleep_TST" "sedative_YN" "stim_YN"
## [9] "SSRI" "SNRI" "bzd_YN" "baseline"
## [13] "dayNumber" "activity" "label" "hour_estimate"
## [17] "n_total_presses" "n_active_hours" "Oura_Sleep_hour"
Exclude the first few days (amounts to a total of 115) to prevent leakage of information from the test set via temporal correlation. 44 participants.
# We can only reliably find the last training days in the full data set
last_train_days <- dat %>%
drop_na(dayNumber) %>%
filter(label == "train") %>%
group_by(subject) %>%
summarize(last_train_day = tail(dayNumber, n = 1))
# Helper function
filter_mutate <- function(.x) {
.x %>%
left_join(last_train_days, by = "subject") %>%
filter(label == "test", dayNumber > last_train_day + 3) %>% # Don't include first three days in test region
mutate(
activity_c = scale(activity),
hour_estimate_c = drop(scale(hour_estimate)),
age_c = scale(Age)
) %>%
group_by(subject) %>%
mutate(
hour_estimate_cluster_centered = drop(scale(hour_estimate, scale = FALSE)),
hour_estimate_cluster_mean = mean(hour_estimate, na.rm = TRUE)
) %>%
ungroup()
}
dat_test_sr <- dat_cmplt_sr %>%
filter_mutate()
dat_test_oura <- dat_cmplt_oura %>%
filter_mutate()
colnames(dat_test_oura)
## [1] "subject" "date"
## [3] "cleartrialphase" "sleepdur_yest"
## [5] "SleepLNQuality" "Oura_Sleep_TST"
## [7] "sedative_YN" "stim_YN"
## [9] "SSRI" "SNRI"
## [11] "bzd_YN" "baseline"
## [13] "dayNumber" "activity"
## [15] "label" "hour_estimate"
## [17] "n_total_presses" "n_active_hours"
## [19] "Oura_Sleep_hour" "Age"
## [21] "race_clear123" "ethnicity"
## [23] "bmi" "filing_status"
## [25] "sex_orient" "Gender"
## [27] "House income" "currentdx_bipolar1"
## [29] "currentdx_bipolar2" "currentdx_cyclothymic"
## [31] "currentdx_othbipolar" "currentdx_bipAMC"
## [33] "currentdx_sindbip" "currentdx_MDD"
## [35] "currentdx_PDD" "currentdx_othDD"
## [37] "currentdx_DDAMC" "currentdx_SiDD"
## [39] "currentdx_Schizophrenia" "currentdx_schizophreniform"
## [41] "currentdx_schizoaffective" "currentdx_delusionaldo"
## [43] "currentdx_briefpsychoticdo" "currentdx_psychoticAMC"
## [45] "currentdx_sipsychoticdo" "currentdx_otherpsychotic"
## [47] "currentdx_SUDalc" "currentdx_SUDsha"
## [49] "currentdx_SUDmj" "currentdx_SUDstim"
## [51] "currentdx_SUDopioid" "currentdx_SUDpcp"
## [53] "currentdx_SUDothhall" "currentdx_SUDinhal"
## [55] "currentdx_SUDother" "currentdx_PanicDO"
## [57] "currentdx_Agoraphobia" "currentdx_SAD"
## [59] "currentdx_Phobi" "currentdx_GAD"
## [61] "currentdx_OtherAnx" "currentdx_AnxAMC"
## [63] "currentdx_siANX" "currentdx_OCD"
## [65] "currentdx_otherOC" "currentdx_OCDAMC"
## [67] "currentdx_siOCD" "currentdx_AN"
## [69] "currentdx_BN" "currentdx_BED"
## [71] "currentdx_otheat" "currentdx_ADHD"
## [73] "currentdx_AcuteStressDO" "currentdx_PTSD"
## [75] "currentdx_AdjustmentDO" "currentdx_Othertrauma"
## [77] "currentdx_PMDD_prov" "currentdx_BPD"
## [79] "last_train_day" "activity_c"
## [81] "hour_estimate_c" "age_c"
## [83] "hour_estimate_cluster_centered" "hour_estimate_cluster_mean"
ggplot(dat_oura %>% drop_na(Oura_Sleep_TST), aes(id)) +
geom_bar() +
labs(x = "Participant ID", y = "Count") +
theme(
text = element_text(size = 18),
axis.text.x = element_text(angle = 90, vjust = 0.5)
)
ggplot(dat_test_oura, aes(subject)) +
geom_bar() +
labs(x = "Participant ID", y = "Count") +
theme(
text = element_text(size = 18),
axis.text.x = element_text(angle = 90, vjust = 0.5)
)
Self-report.
ggplot(dat_test_sr, aes(sleepdur_yest)) +
geom_histogram() +
labs(x = "Self-reported sleep duration")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(dat_test_sr, aes(SleepLNQuality)) +
geom_histogram() +
labs(x = "Self-reported sleep quality")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(dat_test_sr, aes(activity)) +
geom_histogram() +
labs(x = "Activity score")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(dat_test_sr, aes(hour_estimate)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(dat_test_sr, aes(activity_c, sleepdur_yest)) +
geom_jitter()
ggplot(dat_test_sr, aes(hour_estimate, sleepdur_yest)) +
geom_jitter() +
geom_abline(intercept = 0, slope = 1) +
labs(title = "Reported vs estimated sleep (jittered)",
x = "Hour estimate", y = "Self-reported sleep duration") +
theme(text = element_text(size=16))
ggplot(dat_test_sr, aes(hour_estimate, SleepLNQuality)) +
geom_jitter(alpha = 0.2) +
labs(title = "Reported quality vs estimated duration (jittered)",
x = "Hour estimate", y = "Self-reported sleep quality") +
theme(text = element_text(size=16))
Oura ring:
n_presses_breaks = c(400, 1100, 3000, 8100, 22000)
ggplot(dat_test_oura, aes(hour_estimate, Oura_Sleep_hour)) +
geom_jitter(aes(color = n_total_presses)) +
geom_abline(intercept = 0, slope = 1) +
scale_color_gradient(name = "Number of key presses", trans = "log",
breaks = n_presses_breaks, labels = n_presses_breaks) +
labs(title = "Recorded vs estimated sleep (jittered)",
x = "Hour estimate", y = "Oura sleep duration") +
theme(text = element_text(size=16))
ggplot(dat_test_oura, aes(hour_estimate, Oura_Sleep_hour)) +
geom_jitter(aes(color = n_active_hours)) +
geom_abline(intercept = 0, slope = 1) +
labs(title = "Recorded vs estimated sleep (jittered)",
x = "Hour estimate", y = "Oura sleep duration") +
theme(text = element_text(size=16))
Self-report and Oura ring:
# 1887 observations
tmp <- dat_oura %>%
drop_na(sleepdur_yest, Oura_Sleep_hour)
cor.test(tmp$sleepdur_yest, tmp$Oura_Sleep_hour, method = "spearman")
## Warning in cor.test.default(tmp$sleepdur_yest, tmp$Oura_Sleep_hour, method =
## "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: tmp$sleepdur_yest and tmp$Oura_Sleep_hour
## S = 289552223, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.739788
Overall:
# Include all Oura and self-report data points, filter BiAffect data points
# based on train/test label
dat_descr <- dat %>%
left_join(last_train_days, by = "subject") %>%
select(subject, date, sleepdur_yest, Oura_Sleep_hour, hour_estimate,
label, dayNumber, last_train_day) %>%
pivot_longer(sleepdur_yest:hour_estimate, names_to = "variable") %>%
filter(variable != "hour_estimate" | (label == "test" & dayNumber > last_train_day + 3))
max_val = round(max(dat_descr$value, na.rm = TRUE))
g_violin <- ggplot(dat_descr, aes(variable, value, fill = variable)) +
geom_violin() +
geom_jitter(width = 0.05, height = 0, alpha = 0.1) +
scale_x_discrete(labels = c("BiAffect", "Oura ring", "Self-report")) +
scale_y_continuous(minor_breaks = seq(0, max_val)) +
scale_fill_manual(values = c("lightcoral", "aquamarine3", "skyblue4")) +
labs(x = NULL, y = "Sleep duration (hour)") +
# facet_wrap(~ variable, scales = "free_y") +
theme_minimal() +
theme(
text = element_text(size = 16),
legend.position="none",
axis.text.x = element_text(size = 16)
)
g_violin
## Warning: Removed 24691 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 24691 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggsave(file.path(man_img_dir, "violin_sleep.pdf"))
## Saving 7 x 5 in image
## Warning: Removed 24691 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Removed 24691 rows containing missing values or values outside the scale range
## (`geom_point()`).
dat_descr %>%
drop_na(value) %>%
group_by(variable) %>%
summarize(n())
max_lim <- 19
r_coord = 15
tmp <- dat_descr %>%
pivot_wider(id_cols = c(subject, date),
names_from = variable, values_from = value)
gs <- vector("list", length = 3)
gs[[1]] <- ggplot(tmp, aes(hour_estimate, Oura_Sleep_hour)) +
labs(x = "BiAffect sleep duration (hour)",
y = "Oura sleep duration (hour)") +
geom_label(aes(x, y), label = expression(r[s] == 0.40), parse = TRUE,
data = data.frame(x = r_coord, y = r_coord)) # Prevent overdrawing
gs[[2]] <- ggplot(tmp, aes(hour_estimate, sleepdur_yest)) +
labs(x = "BiAffect sleep duration (hour)",
y = "Self-reported sleep duration (hour)") +
geom_label(aes(x, y), label = expression(r[s] == 0.26), parse = TRUE,
data = data.frame(x = r_coord, y = r_coord)) # Prevent overdrawing
gs[[3]] <- ggplot(tmp, aes(sleepdur_yest, Oura_Sleep_hour))+
labs(x = "Self-reported sleep duration (hour)",
y = "Oura sleep duration (hour)") +
geom_label(aes(x, y), label = expression(r[s] == 0.74), parse = TRUE,
data = data.frame(x = r_coord, y = r_coord)) # Prevent overdrawing
gs <- lapply(gs, function(g) {
g +
geom_jitter(width = 0.2, height = 0.2, alpha = 0.5) +
xlim(0, max_lim) +
ylim(0, max_lim) +
coord_fixed() +
theme_minimal() +
theme(text = element_text(size = 16))
})
g_scatter <- ggarrange(plotlist = gs, nrow = 1)
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning: Removed 16518 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning: Removed 15614 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning: Removed 15587 rows containing missing values or values outside the scale range
## (`geom_point()`).
g_scatter
ggarrange(g_violin, g_scatter, labels = c("A", "B"), font.label = list(size = 20), nrow = 2)
## Warning: Removed 24691 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 24691 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggsave(file.path(man_img_dir, "violin_scatter.pdf"))
## Saving 7 x 8 in image
More than enough evidence for random slopes and intercepts:
lmList <- nlme::lmList
m_list <- lmList(Oura_Sleep_hour ~ hour_estimate_c | subject, data = dat_test_oura, na.action = na.omit)
plot(intervals(m_list))
No correlation between random slope and intercept.
m1 <- lmer(sleepdur_yest ~ activity_c + (activity_c || subject), dat_test_sr)
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: sleepdur_yest ~ activity_c + ((1 | subject) + (0 + activity_c |
## subject))
## Data: dat_test_sr
##
## REML criterion at convergence: 6279.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8534 -0.5346 0.0367 0.5671 3.4142
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.68807 0.8295
## subject.1 activity_c 0.01884 0.1372
## Residual 1.56250 1.2500
## Number of obs: 1857, groups: subject, 70
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.78254 0.10578 64.12
## activity_c -0.39390 0.04751 -8.29
##
## Correlation of Fixed Effects:
## (Intr)
## activity_c 0.004
There is structure in the residuals. This is because the response variable is discrete. Maybe ordinal regression makes more sense here.
plot(m1)
Residuals also show some left-skew.
qqnorm(resid(m1))
qqline(resid(m1))
hist(resid(m1))
Random effects look okay except for that outlier in the bottom left.
re <- ranef(m1)$subject %>%
rownames_to_column("subject") %>%
pivot_longer(!subject)
ggplot(re, aes(sample = value)) +
geom_qq() +
geom_qq_line() +
facet_wrap(~ name, scales = "free")
plot(predict_response(m1, terms = "activity_c"), show_data = TRUE) +
labs(title = "Predicted values of sleep duration",
x = "Activity (centred)", y = "Sleep duration") +
theme(text = element_text(size = 16))
## Data points may overlap. Use the `jitter` argument to add some amount of
## random variation to the location of data points and avoid overplotting.
cor.test(dat_test_sr$hour_estimate, dat_test_sr$sleepdur_yest, method = "spearman")
## Warning in cor.test.default(dat_test_sr$hour_estimate,
## dat_test_sr$sleepdur_yest, : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: dat_test_sr$hour_estimate and dat_test_sr$sleepdur_yest
## S = 795121818, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2550119
m2_sr <- lmer(sleepdur_yest ~ hour_estimate_c + (hour_estimate_c | subject),
dat_test_sr)
summary(m2_sr)
## Linear mixed model fit by REML ['lmerMod']
## Formula: sleepdur_yest ~ hour_estimate_c + (hour_estimate_c | subject)
## Data: dat_test_sr
##
## REML criterion at convergence: 6185.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7328 -0.5472 0.0519 0.5475 3.3838
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.6110 0.7817
## hour_estimate_c 0.1228 0.3504 0.12
## Residual 1.4529 1.2054
## Number of obs: 1857, groups: subject, 70
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.86173 0.10174 67.445
## hour_estimate_c 0.46840 0.05919 7.914
##
## Correlation of Fixed Effects:
## (Intr)
## hour_stmt_c 0.114
To get the p values.
m2_nlme <- lme(sleepdur_yest ~ hour_estimate_c,
data = dat_test_sr,
random = ~ hour_estimate_c | subject)
summary(m2_nlme)
## Linear mixed-effects model fit by REML
## Data: dat_test_sr
## AIC BIC logLik
## 6197.27 6230.424 -3092.635
##
## Random effects:
## Formula: ~hour_estimate_c | subject
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 0.7816851 (Intr)
## hour_estimate_c 0.3503600 0.116
## Residual 1.2053529
##
## Fixed effects: sleepdur_yest ~ hour_estimate_c
## Value Std.Error DF t-value p-value
## (Intercept) 6.861725 0.10173705 1786 67.44569 0
## hour_estimate_c 0.468400 0.05918546 1786 7.91411 0
## Correlation:
## (Intr)
## hour_estimate_c 0.114
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.73275027 -0.54720891 0.05188497 0.54745068 3.38377022
##
## Number of Observations: 1857
## Number of Groups: 70
plot(m2_sr)
Residuals also show some left-skew.
qqnorm(resid(m2_sr))
qqline(resid(m2_sr))
hist(resid(m2_sr))
Random effects look okay, although the tails can get a little wonky.
re <- ranef(m2_sr)$subject %>%
rownames_to_column("subject") %>%
pivot_longer(!subject)
ggplot(re, aes(sample = value)) +
geom_qq() +
geom_qq_line() +
facet_wrap(~ name, scales = "free")
plot(predict_response(m2_sr, terms = "hour_estimate_c"), show_data = TRUE, jitter = 0.1) +
labs(title = "Predicted values of sleep duration",
x = "Hour estimate (centred)", y = "Sleep duration") +
theme(text = element_text(size = 16))
hour_mean <- dat_test_sr$hour_estimate_c %@% "scaled:center"
hour_sd <- dat_test_sr$hour_estimate_c %@% "scaled:scale"
pr <- predict_response(m2_sr, terms = "hour_estimate_c") %>%
as.data.frame() %>%
mutate(x = hour_sd * x + hour_mean)
n_presses_breaks = c(400, 1100, 3000, 8100, 22000)
max_est = round(max(dat_test_sr$hour_estimate))
g_pred_sr <- ggplot(pr, aes(x, predicted)) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "grey") +
geom_line(color = "lightcoral", lwd = 1.2) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
fill="lightcoral", alpha = 0.2) +
geom_jitter(
aes(hour_estimate, sleepdur_yest, color = n_total_presses),
width = 0.2, height = 0.1, alpha = 0.5,
data = dat_cmplt_sr) +
scale_color_gradient(name = "Number of key presses", transform = "log",
breaks = n_presses_breaks, labels = n_presses_breaks,
high = "aquamarine3") +
scale_x_continuous(minor_breaks = seq(
0, max_est
)) +
scale_y_continuous(minor_breaks = seq(0, 15), limits = c(0, 15)) +
labs(x = "BiAffect-estimated sleep duration (hour)",
y = "Self-reported sleep duration (hour)") +
coord_fixed() +
theme_minimal() +
theme(
text = element_text(size = 20),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
g_pred_sr
# ggsave(file.path(man_img_dir, "duration_prediction.pdf"))
r_squared <- r2mlm(m2_sr)
# noquote class
vars <- r_squared$Decompositions[,1]
vars_df_sr <- data.frame(variance = unclass(vars)) %>%
rownames_to_column("component") %>%
filter(component != "fixed, between") %>%
mutate(
component = case_when(
component == "fixed, within" ~ "Fixed effects",
component == "slope variation" ~ "Random slope",
component == "mean variation" ~ "Random intercept",
component == "sigma2" ~ "Residuals"
)
)
vars_df_sr
cols <- rev(c("skyblue2", "skyblue3", "skyblue4", "grey25"))
g_var_sr <- ggplot(vars_df_sr, aes(1, variance)) +
geom_col(aes(fill = fct_rev(component))) +
scale_fill_manual(values = cols, name = "Variance component") +
labs(x = NULL, y = "Proportion of variance") +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(position = "right") +
theme_minimal() +
theme(
text = element_text(size = 20),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
g_var_sr
g_legends <- ggarrange(get_legend(g_pred_sr), get_legend(g_var_sr), ncol = 1, align = "v")
g_plots <- ggarrange(g_pred_sr, g_var_sr, widths = c(7, 1), legend = "none")
ggarrange(g_plots, g_legends, widths = c(4, 1))
# ggsave(file.path(man_img_dir, "duration_prediction_sr_var.pdf"))
cortest <- cor.test(dat_test_oura$hour_estimate, dat_test_oura$Oura_Sleep_hour,
method = "spearman")
## Warning in cor.test.default(dat_test_oura$hour_estimate,
## dat_test_oura$Oura_Sleep_hour, : Cannot compute exact p-value with ties
cortest
##
## Spearman's rank correlation rho
##
## data: dat_test_oura$hour_estimate and dat_test_oura$Oura_Sleep_hour
## S = 86428092, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4008605
tmp <- dat_test_oura %>%
mutate(
sedative_YN = as.integer(sedative_YN),
stim_YN = as.integer(stim_YN),
SSRI = as.integer(SSRI),
SNRI = as.integer(SNRI),
age_c = drop(age_c)
)
m2_oura <- lmer(
# Oura_Sleep_hour ~ hour_estimate_c + age_c + sedative_YN + stim_YN + SSRI + SNRI + (hour_estimate_c | subject),
Oura_Sleep_hour ~ hour_estimate_c + (hour_estimate_c | subject),
tmp)
summary(m2_oura)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Oura_Sleep_hour ~ hour_estimate_c + (hour_estimate_c | subject)
## Data: tmp
##
## REML criterion at convergence: 3251.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4676 -0.5692 0.0217 0.5624 3.4339
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.3992 0.6318
## hour_estimate_c 0.2365 0.4863 0.27
## Residual 1.5735 1.2544
## Number of obs: 953, groups: subject, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 7.09929 0.11635 61.017
## hour_estimate_c 0.69065 0.09868 6.999
##
## Correlation of Fixed Effects:
## (Intr)
## hour_stmt_c 0.239
m2_oura_nlme <- lme(Oura_Sleep_hour ~ hour_estimate_c,
data = dat_test_oura,
random = ~ hour_estimate_c | subject)
summary(m2_oura_nlme)
## Linear mixed-effects model fit by REML
## Data: dat_test_oura
## AIC BIC logLik
## 3263.511 3292.657 -1625.756
##
## Random effects:
## Formula: ~hour_estimate_c | subject
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 0.6318311 (Intr)
## hour_estimate_c 0.4862812 0.271
## Residual 1.2544055
##
## Fixed effects: Oura_Sleep_hour ~ hour_estimate_c
## Value Std.Error DF t-value p-value
## (Intercept) 7.099294 0.1163505 912 61.01644 0
## hour_estimate_c 0.690655 0.0986821 912 6.99878 0
## Correlation:
## (Intr)
## hour_estimate_c 0.239
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.46757269 -0.56924621 0.02173227 0.56239771 3.43394523
##
## Number of Observations: 953
## Number of Groups: 40
get_coef_table <- function(m) {
summ <- summary(m)
tab <- summ$tTable
tab <- tab[2:nrow(tab),]
betas <- tab[,1]
ses <- tab[,2]
p_vals <- tab[,5]
data.frame(beta = betas, SE = ses, p = p_vals)
}
# get_coef_table(m2_oura_nlme)
Cluster-centred predictors. Drawback is that the intercept and cluster (participant) means are almost perfectly correlated.
m2_oura_cluster <- lmer(
Oura_Sleep_hour ~ hour_estimate_cluster_centered + hour_estimate_cluster_mean + (hour_estimate_cluster_centered | subject),
dat_test_oura)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00243677 (tol = 0.002, component 1)
summary(m2_oura_cluster)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## Oura_Sleep_hour ~ hour_estimate_cluster_centered + hour_estimate_cluster_mean +
## (hour_estimate_cluster_centered | subject)
## Data: dat_test_oura
##
## REML criterion at convergence: 3256.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4812 -0.5742 0.0333 0.5490 3.3276
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## subject (Intercept) 0.4354 0.6598
## hour_estimate_cluster_centered 0.0600 0.2449 0.27
## Residual 1.5719 1.2538
## Number of obs: 953, groups: subject, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.18302 0.55199 9.390
## hour_estimate_cluster_centered 0.33962 0.05040 6.739
## hour_estimate_cluster_mean 0.22716 0.06907 3.289
##
## Correlation of Fixed Effects:
## (Intr) hr_stmt_clstr_c
## hr_stmt_clstr_c 0.017
## hr_stmt_clstr_m -0.978 0.023
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00243677 (tol = 0.002, component 1)
plot(m2_oura)
qqnorm(resid(m2_oura))
qqline(resid(m2_oura))
hist(resid(m2_oura))
Random effects look okay except for that outlier in the bottom left.
re <- ranef(m2_oura)$subject %>%
rownames_to_column("subject") %>%
pivot_longer(!subject)
ggplot(re, aes(sample = value)) +
geom_qq() +
geom_qq_line() +
facet_wrap(~ name, scales = "free")
r_squared <- r2mlm(m2_oura)
# noquote class
vars <- r_squared$Decompositions[,1]
vars_df <- data.frame(variance = unclass(vars)) %>%
rownames_to_column("component") %>%
filter(component != "fixed, between") %>%
mutate(
component = case_when(
component == "fixed, within" | component == "fixed" ~ "Fixed effects",
component == "slope variation" ~ "Random slope",
component == "mean variation" ~ "Random intercept",
component == "sigma2" ~ "Residuals"
)
)
vars_df
cols <- rev(c("skyblue2", "skyblue3", "skyblue4", "grey20"))
g_var <- ggplot(vars_df, aes(1, variance)) +
geom_col(aes(fill = fct_rev(component))) +
scale_fill_manual(values = cols, name = "Variance component") +
labs(x = NULL, y = "Proportion of variance") +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(position = "right") +
theme_minimal() +
theme(
text = element_text(size = 20),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
g_var
r2mlm(m2_oura_cluster)
## $Decompositions
## total within between
## fixed, within 0.10669855 0.13600387 NA
## fixed, between 0.04310256 NA 0.2000358
## slope variation 0.05550070 0.07074427 NA
## mean variation 0.17237164 NA 0.7999642
## sigma2 0.62232655 0.79325186 NA
##
## $R2s
## total within between
## f1 0.10669855 0.13600387 NA
## f2 0.04310256 NA 0.2000358
## v 0.05550070 0.07074427 NA
## m 0.17237164 NA 0.7999642
## f 0.14980111 NA NA
## fv 0.20530181 0.20674814 NA
## fvm 0.37767345 NA NA
plot(predict_response(m2_oura, terms = "hour_estimate_c"), show_data = TRUE, jitter = 0.1) +
labs(title = "Predicted values of sleep duration",
x = "Hour estimate (centred)", y = "Sleep duration") +
theme(text = element_text(size = 16))
hour_mean <- dat_test_oura$hour_estimate_c %@% "scaled:center"
hour_sd <- dat_test_oura$hour_estimate_c %@% "scaled:scale"
pr <- predict_response(m2_oura, terms = "hour_estimate_c") %>%
as.data.frame() %>%
mutate(x = hour_sd * x + hour_mean)
n_presses_breaks = c(400, 1100, 3000, 8100, 22000)
max_est = round(max(dat_test_oura$hour_estimate))
g_pred <- ggplot(pr, aes(x, predicted)) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "grey") +
geom_line(color = "lightcoral", lwd = 1.2) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
fill="lightcoral", alpha = 0.2) +
geom_jitter(
aes(hour_estimate, Oura_Sleep_hour, color = n_total_presses),
width = 0.2, height = 0.1,# alpha = 0.5,
data = dat_test_oura) +
scale_color_gradient(name = "Number of key presses", transform = "log",
breaks = n_presses_breaks, labels = n_presses_breaks,
high = "aquamarine3") +
scale_x_continuous(minor_breaks = seq(0, max_est)) +
scale_y_continuous(minor_breaks = seq(0, 15), limits = c(0, 15)) +
labs(x = "BiAffect-estimated sleep duration (hour)",
y = "Oura-estimated sleep duration (hour)") +
coord_fixed() +
theme_minimal() +
theme(
text = element_text(size = 20),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
g_pred
# ggsave(file.path(man_img_dir, "duration_prediction_oura.pdf"))
g_legends <- ggarrange(get_legend(g_pred), get_legend(g_var), ncol = 1, align = "v")
g_plots <- ggarrange(g_pred, g_var, widths = c(7, 1), legend = "none")
ggarrange(g_plots, g_legends, widths = c(4, 1))
# ggsave(file.path(man_img_dir, "duration_prediction_oura_var.pdf"))
ids <- as.integer(unique(dat_test_sr$subject))
dem_test <- dem %>%
filter(id %in% ids) %>%
select(!c(id, race_clear123, ethnicity, bmi, filing_status, sex_orient)) %>%
select(where(~ !is.numeric(.x) || sum(.x, na.rm = TRUE) > 0)) %>%
mutate(across(starts_with("currentdx"), as.logical))
tab_txt <- capture.output(CreateTableOne(data = dem_test)) %>%
str_replace("^\\s\\s", "") %>%
str_replace("(\\w|\\))\\s+(\\d.*)", "\\1;\\2") %>%
str_replace_all("currentdx_(.+) = TRUE \\(%\\);(.*)", " \\1;\\2")
cat(tab_txt, sep = "\n")
##
## Overall
## n;70
## Age (mean (SD));26.75 (4.62)
## Gender (%)
## Female;65 (92.9)
## Nonbinary;4 ( 5.7)
## Other;1 ( 1.4)
## House income (%)
## less than $15,000;5 ( 7.9)
## $15,000 - $19,999;0 ( 0.0)
## $20,000 - $24,999;3 ( 4.8)
## $25,000 - $29,999;5 ( 7.9)
## $30,000 - $34,999;2 ( 3.2)
## $35,000 - $39,999;4 ( 6.3)
## $40,000 - $49,999;5 ( 7.9)
## $50,000 - $79,999;16 (25.4)
## $80,000 - $99,999;9 (14.3)
## $100,000 or above;14 (22.2)
## bipolar2;3 ( 4.5)
## MDD;38 (56.7)
## PDD;27 (40.3)
## othDD;1 ( 1.5)
## SUDalc;9 (13.4)
## SUDsha;1 ( 1.5)
## SUDmj;6 ( 9.0)
## SUDother;1 ( 1.5)
## PanicDO;4 ( 6.0)
## Agoraphobia;5 ( 7.5)
## SAD;24 (35.8)
## Phobi;7 (10.4)
## GAD;30 (44.8)
## OtherAnx;1 ( 1.5)
## OCD;5 ( 7.5)
## otherOC;1 ( 1.5)
## BED;3 ( 4.5)
## otheat;3 ( 4.5)
## ADHD;14 (20.9)
## PTSD;12 (17.9)
## Othertrauma;2 ( 3.0)
## PMDD_prov;10 (15.9)
## BPD;4 ( 6.0)
ids <- as.integer(unique(dat_test_oura$subject))
dem_test <- dem %>%
filter(id %in% ids) %>%
select(!c(id, race_clear123, ethnicity, bmi, filing_status, sex_orient)) %>%
select(where(~ !is.numeric(.x) || sum(.x, na.rm = TRUE) > 0)) %>%
mutate(across(starts_with("currentdx"), as.logical))
tab_txt <- capture.output(CreateTableOne(data = dem_test)) %>%
str_replace("^\\s\\s", "") %>%
str_replace("(\\w|\\))\\s+(\\d.*)", "\\1;\\2") %>%
str_replace_all("currentdx_(.+) = TRUE \\(%\\);(.*)", " \\1;\\2")
cat(tab_txt, sep = "\n")
##
## Overall
## n;40
## Age (mean (SD));26.77 (4.15)
## Gender (%)
## Female;36 (90.0)
## Nonbinary;3 ( 7.5)
## Other;1 ( 2.5)
## House income (%)
## less than $15,000;4 (11.4)
## $15,000 - $19,999;0 ( 0.0)
## $20,000 - $24,999;3 ( 8.6)
## $25,000 - $29,999;3 ( 8.6)
## $30,000 - $34,999;2 ( 5.7)
## $35,000 - $39,999;3 ( 8.6)
## $40,000 - $49,999;1 ( 2.9)
## $50,000 - $79,999;9 (25.7)
## $80,000 - $99,999;5 (14.3)
## $100,000 or above;5 (14.3)
## bipolar2;2 ( 5.3)
## MDD;23 (60.5)
## PDD;17 (44.7)
## SUDalc;4 (10.5)
## SUDmj;3 ( 7.9)
## PanicDO;2 ( 5.3)
## Agoraphobia;2 ( 5.3)
## SAD;10 (26.3)
## Phobi;4 (10.5)
## GAD;17 (44.7)
## OtherAnx;1 ( 2.6)
## OCD;3 ( 7.9)
## otherOC;1 ( 2.6)
## BED;3 ( 7.9)
## otheat;1 ( 2.6)
## ADHD;9 (23.7)
## PTSD;6 (15.8)
## PMDD_prov;3 ( 8.6)
## BPD;1 ( 2.6)
dat_dvdw <- read_parquet(file.path(dat_dir, "dat_dvdw.parquet"))
colnames(dat_dvdw)
## [1] "subject" "date" "kap"
## [4] "n_active_hours" "prev_n_active_hours" "next_n_active_hours"
dat_dvdw_oura <- dat_dvdw %>%
left_join(dat_oura %>% mutate(id = as.character(id)),
by = c("subject" = "id", "date" = "daterated")) %>%
drop_na(Oura_Sleep_TST) %>%
mutate(
kap_c = drop(scale(kap)),
n_active_hours_c = drop(scale(n_active_hours)),
prev_n_active_hours_c = drop(scale(prev_n_active_hours))
)
colnames(dat_dvdw_oura)
## [1] "subject" "date" "kap"
## [4] "n_active_hours" "prev_n_active_hours" "next_n_active_hours"
## [7] "cleartrialphase" "sleepdur_yest" "SleepLNQuality"
## [10] "Oura_Sleep_TST" "sedative_YN" "stim_YN"
## [13] "SSRI" "SNRI" "bzd_YN"
## [16] "Oura_Sleep_hour" "kap_c" "n_active_hours_c"
## [19] "prev_n_active_hours_c"
m_dvdw <- lmer(Oura_Sleep_hour ~ kap_c + n_active_hours_c + prev_n_active_hours_c + (1 | subject),
dat_dvdw_oura)
summary(m_dvdw)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Oura_Sleep_hour ~ kap_c + n_active_hours_c + prev_n_active_hours_c +
## (1 | subject)
## Data: dat_dvdw_oura
##
## REML criterion at convergence: 3583.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8836 -0.5606 0.0220 0.5891 3.6701
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 0.5297 0.7278
## Residual 1.5259 1.2353
## Number of obs: 1069, groups: subject, 41
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.96366 0.12332 56.468
## kap_c 0.62161 0.04385 14.176
## n_active_hours_c -0.10696 0.04368 -2.449
## prev_n_active_hours_c -0.03105 0.03855 -0.806
##
## Correlation of Fixed Effects:
## (Intr) kap_c n_ct__
## kap_c -0.001
## n_ctv_hrs_c 0.000 0.471
## prv_n_ctv__ 0.000 -0.025 -0.028
m_dvdw_nlme <- lme(Oura_Sleep_hour ~ kap_c + n_active_hours_c + prev_n_active_hours_c,
data = dat_dvdw_oura,
random = ~ 1 | subject,
na.action = na.omit)
summary(m_dvdw_nlme)
## Linear mixed-effects model fit by REML
## Data: dat_dvdw_oura
## AIC BIC logLik
## 3595.784 3625.609 -1791.892
##
## Random effects:
## Formula: ~1 | subject
## (Intercept) Residual
## StdDev: 0.7277995 1.235264
##
## Fixed effects: Oura_Sleep_hour ~ kap_c + n_active_hours_c + prev_n_active_hours_c
## Value Std.Error DF t-value p-value
## (Intercept) 6.963658 0.12331942 1025 56.46846 0.0000
## kap_c 0.621613 0.04384981 1025 14.17596 0.0000
## n_active_hours_c -0.106959 0.04368049 1025 -2.44866 0.0145
## prev_n_active_hours_c -0.031052 0.03854545 1025 -0.80560 0.4207
## Correlation:
## (Intr) kap_c n_ct__
## kap_c -0.001
## n_active_hours_c 0.000 0.471
## prev_n_active_hours_c 0.000 -0.025 -0.028
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.88362503 -0.56064334 0.02198986 0.58905176 3.67013387
##
## Number of Observations: 1069
## Number of Groups: 41
plot(m_dvdw)
qqnorm(resid(m_dvdw))
qqline(resid(m_dvdw))
hist(resid(m_dvdw))
re <- ranef(m_dvdw)$subject %>%
rownames_to_column("subject") %>%
pivot_longer(!subject)
ggplot(re, aes(sample = value)) +
geom_qq() +
geom_qq_line() +
facet_wrap(~ name, scales = "free")
plot(predict_response(m_dvdw, terms = "kap_c"), show_data = TRUE) +
xlim(-4, 4)
## Data points may overlap. Use the `jitter` argument to add some amount of
## random variation to the location of data points and avoid overplotting.
kap_mean <- mean(dat_dvdw_oura$kap)
kap_sd <- sd(dat_dvdw_oura$kap)
pr <- predict_response(m_dvdw, terms = "kap_c") %>%
as.data.frame() %>%
mutate(x = kap_sd * x + kap_mean)
n_presses_breaks = c(400, 1100, 3000, 8100, 22000)
g_dvdw <- ggplot(pr, aes(x, predicted)) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "grey") +
geom_line(color = "lightcoral", lwd = 1.2) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high), fill="lightcoral", alpha = 0.2) +
geom_point(
aes(kap, Oura_Sleep_hour),
data = dat_dvdw_oura) +
# scale_color_gradient(name = "Number of key presses", transform = "log",
# breaks = n_presses_breaks, labels = n_presses_breaks, high = "aquamarine3") +
scale_x_continuous(minor_breaks = ~ seq(0, 15)) +
labs(x = "Keystroke-absence period (hour)", y = "Oura-estimated sleep duration (hour)") +
theme_minimal() +
theme(
text = element_text(size = 20)
)
# ggsave(file.path(man_img_dir, "duration_prediction_dvdw.pdf"))
g_dvdw
r_squared <- r2mlm(m_dvdw)
# noquote class
vars <- r_squared$Decompositions[,1]
vars_df <- data.frame(variance = unclass(vars)) %>%
rownames_to_column("component") %>%
filter(component != "fixed, between", component != "slope variation") %>%
mutate(
component = case_when(
component == "fixed" ~ "Fixed effects",
component == "mean variation" ~ "Random intercept",
component == "sigma2" ~ "Residuals"
)
)
vars_df
cols <- rev(c("aquamarine2", "aquamarine3", "grey20"))
g_var_dvdw <- ggplot(vars_df, aes(1, variance)) +
geom_col(aes(fill = fct_rev(component))) +
scale_fill_manual(values = cols, name = "Variance component") +
labs(x = NULL, y = "Proportion of variance") +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(position = "right") +
theme_minimal() +
theme(
text = element_text(size = 20),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)
)
g_var_dvdw
ggarrange(g_dvdw, g_var_dvdw, widths = c(5, 2))
# ggsave(file.path(man_img_dir, "duration_prediction_oura_var_dvdw.pdf"))